We will see how is the impact of COvid-19 on the USA. lets load our libraries first

library(plotly)
library(lubridate)
library(tidyr)
library(dplyr)

Our dataset for state-wise covid infection data.

df = read.csv("data/us_states_covid19_daily.csv")

we need state and positive number of cases , hospitalized, totaltestsviral.

req_cols = c("date","state","positive","hospitalized","totalTestsViral")
df_pos = df[,req_cols]
df_pos[is.na(df_pos)] <- 0
print((paste("number of NA rows:",sum(is.na(df_pos)))))
## [1] "number of NA rows: 0"

converting date into “date” datatype.

df_pos$date = as.character(df_pos$date)
df_pos$date = as.Date(df_pos$date,format = ("%Y%m%d") , origin = "20200122")
class(df_pos$date)
## [1] "Date"
plot_ly(x = df_pos$date,y = df_pos$positive, color = factor(df_pos$state),
        mode = "lines") %>% layout(title = "USA COVID-19 trends from JAN to DEC 2020")

since our plot is looking messy we can try to find out which states are not doing well.

df_pos$state = as.factor(df_pos$state)
class(df_pos$state)
## [1] "factor"
df_state = group_by(df_pos , state)
new_df = data.frame(summarise(df_state, Total_positive_cases = sum(positive) , 
                              Total_Hospitalized = sum(hospitalized), Total_testing = sum(totalTestsViral)))
head(new_df)
##   state Total_positive_cases Total_Hospitalized Total_testing
## 1    AK              1656760              55545      78690463
## 2    AL             24282758            2622751             0
## 3    AR             13422076             861542     159758326
## 4    AS                    0                  0        249791
## 5    AZ             35894317            3215012             0
## 6    CA            126643296                  0    2310461373
sorted_df = new_df[order(new_df$Total_positive_cases,decreasing = TRUE),]
head(sorted_df) 
##    state Total_positive_cases Total_Hospitalized Total_testing
## 6     CA            126643296                  0    2310461373
## 48    TX            114486497                  0    1014046744
## 11    FL            106829756            7070416    1178962691
## 38    NY            104056991           21377064             0
## 17    IL             59718277                  0     906112502
## 12    GA             48555886            4667944     455023678

Top 10 states with highest number of positive cases.

top_10 = sorted_df[1:10,]
head(top_10)
##    state Total_positive_cases Total_Hospitalized Total_testing
## 6     CA            126643296                  0    2310461373
## 48    TX            114486497                  0    1014046744
## 11    FL            106829756            7070416    1178962691
## 38    NY            104056991           21377064             0
## 17    IL             59718277                  0     906112502
## 12    GA             48555886            4667944     455023678
state_name = as.character(top_10$state)
df_pos$state = as.character(df_pos$state)
new_df_pos = df_pos[df_pos$state %in% state_name,]
plot_ly(x = new_df_pos$date,y = new_df_pos$positive, color = factor(new_df_pos$state),
        mode = "lines") %>% layout(title = "Top 10 US states COVID-19 trends from JAN to DEC 2020")

choropleth map of USA depicting Positivity rate.

state_pop <- data.frame(State = new_df$state ,Positive_case = new_df$Total_positive_cases)

state_pop$hover <- with(state_pop, paste(State, "<br>", "positive_cases:", Positive_case))

borders <- list(color = toRGB("red"))

map_options <- list(
  scope = "usa",
  projection = list(type= "albers usa"),
  showlakes = TRUE,
  lakecolor = toRGB("white")
)
plot_ly(state_pop , z = state_pop$Positive_case , text = state_pop$hover ,locations = state_pop$State,
        type = "choropleth" ,locationmode = "USA-states",
        color = state_pop$Positive_case,colorscale = "Reds" , marker = list(line = borders)) %>%
  layout(title = "USA COVID-19 positivity" , geo = map_options)